package cn.datawin.seoTest;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jsoup.nodes.Element;

import cn.datawin.spider.httputil.HttpResponse;
import cn.datawin.spider.seletor.AbstractSelectable;
import cn.datawin.spider.seletor.Selectable;
import cn.datawin.spider.seletor.Html;
import cn.datawin.spider.util.HttpUtil;
import cn.datawin.spider.util.UrlUtil;

/**
 * 内部优化检测
 * 
 * @author sdf
 */
public class InsideOptimizeAnalyze {
	
	/* 所有链接*/
	private Set<String> links = new HashSet<String>();
	
	/* 相关链接*/
	private Set<String> ownlinks = new HashSet<String>();
	
	/* 友情链接*/
	private Set<String> flinks = new HashSet<String>();
	/* 页面源代码 */
	private String html;
	
	private String other;
	/* 首页标题 */
	private String title;
	/* 首页关键字 */
	private String keywords;
	/* 首页描述 */
	private String description;
	/* 编码方式 */
	private String charset;
	/* #空连接数量 */
	private int emptyLinkNum;
	/* #重复链接 */
	private int sameLinkNum;
	//域名
	private String domain;
	//当前url
	private String cururl;
	//是否备案
	private boolean isbeian=false;
	
	public InsideOptimizeAnalyze() {
	}
	
	public InsideOptimizeAnalyze(String html,String url) throws CloneNotSupportedException {
		this.html = html;
		Html h = new Html(html);
		this.cururl=url;
		this.title = ((Html)h.clone()).$("title").text();
		this.keywords = ((Html)h.clone()).$("[name=keywords]").attr("content");
		this.description = ((Html)h.clone()).$("[name=description]").attr("content");
		this.charset = ((Html)h.clone()).$("meta").toString().toUpperCase().contains("UTF") ? "UTF-8" : "GBK";
		getDomain(url);
		setLinks(h, url);
		classifylinks();
		
	}
	
	public Set<String> getLinks() {
		return links;
	}

	public void setLinks(Set<String> links) {
		this.links = links;
	}

	public Set<String> getOwnlinks() {
		return ownlinks;
	}

	public void setOwnlinks(Set<String> ownlinks) {
		this.ownlinks = ownlinks;
	}

	public Set<String> getFlinks() {
		return flinks;
	}

	public void setFlinks(Set<String> flinks) {
		this.flinks = flinks;
	}

	public String getHtml() {
		return html;
	}

	public void setHtml(String html) {
		this.html = html;
	}

	public String getOther() {
		return other;
	}

	public void setOther(String other) {
		this.other = other;
	}

	public String getTitle() {
		return title;
	}

	public void setTitle(String title) {
		this.title = title;
	}

	public String getKeywords() {
		return keywords;
	}

	public void setKeywords(String keywords) {
		this.keywords = keywords;
	}

	public String getDescription() {
		return description;
	}

	public void setDescription(String description) {
		this.description = description;
	}

	public String getCharset() {
		return charset;
	}

	public void setCharset(String charset) {
		this.charset = charset;
	}

	public int getEmptyLinkNum() {
		return emptyLinkNum;
	}

	public void setEmptyLinkNum(int emptyLinkNum) {
		this.emptyLinkNum = emptyLinkNum;
	}

	
	public boolean getIsbeian() {
		return isbeian;
	}

	public void setIsbeian(boolean isbeian) {
		this.isbeian = isbeian;
	}

	/**
	 * 取得所有的链接
	 * @param h
	 * @param url
	 * @throws CloneNotSupportedException
	 * @author zhangke
	 */
	public void setLinks(Html h,String url) throws CloneNotSupportedException{
		String href = ((Html)h.clone()).$("a").attr("href");
		String[] hrefs = href.split("\\^\\|");
		for(String hf:hrefs){
			if(hf.trim().equals("#")){
				emptyLinkNum++;
				continue;
			}
			if(!this.links.add(UrlUtil.canonicalizeUrl(hf, url))){
				sameLinkNum++;
			}
		}
	}
	
	
	/**
	 * 区分自有链接和友情链接
	 * @param url
	 * @author zhangke
	 */
	public void classifylinks(){
		for(String link:this.links){
			if(link.indexOf(this.domain)!=-1){
				this.ownlinks.add(link);
			}else{
				this.flinks.add(link);
			}
		}
	}
	
	/**
	 * 首页链接数量建议
	 * @author wangzeng
	 * @param html
	 * @return
	 * @throws CloneNotSupportedException 
	 */
	public Map<String, Object> linkNumberAnalyze() {
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "首页链接数量建议");
		map.put("msg", "");
		map.put("score", 0);
		
		int allhrefs=ownlinks.size();
        
		if(allhrefs<20){
			map.put("msg", "首页超链接非常少,不利于排名 ");
			map.put("score", 1);
		}else if(allhrefs>160){
			map.put("msg", "首页超链接太多,不利于排名 ");
			map.put("score", 1);
		}else{
			map.put("msg", "首页超链接数量适中 ");
			map.put("score", 2);
		}
		return map;
	}

	/**
	 * 首页关键词密度建议
	 * @author wangzeng
	 * @param html
	 * @return
	 * @throws CloneNotSupportedException 
	 */
	public Map<String, Object> keyNumAnalyze() throws CloneNotSupportedException {
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "首页关键词密度建议");
		map.put("msg", "");
		map.put("score", 0);

		Html html2=new Html(html);
		String allstr=((Html)html2.clone()).text();
		
		double m=(float)keywords.length()/allstr.length();
		DecimalFormat df = new DecimalFormat("0.00");
		String size = df.format(m);
		m=Double.parseDouble(size);
		
		if(m<0.01){
			map.put("msg", "首页关键词密度太低");
			map.put("score", 0);
		}else if(m>0.1){
			map.put("msg", "首页关键词密度太高");
			map.put("score", 0);
		}else{
			map.put("msg", "密度不是绝对的，视行业调整，做到自然");
			map.put("score", 2);
		}
		return map;
	}

	/**
	 * 首页链接中关键词建议
	 *  @author wangzeng
	 * @param html
	 * @return
	 */
	public Map<String, Object> linkKeyAnalyze() {
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "首页链接中关键词建议");
		map.put("msg", "");
		map.put("score", 0);

		Html html2=new Html(html);
		List<Element> as= html2.$("a").allele();
		
		boolean f=false;
		for(Element e:as){
			String atext=e.text();
			if(atext.length()>30 || atext.length()<3){
				map.put("msg", "超链接核心关键词长度不适，不利于优化");
				map.put("score", 0);
				f=true;
				break;
			}
		}
		
		if(!f){
			map.put("msg", "超链接优化的不错");
			map.put("score", 2);
			
		}
		return map;
	}

	/**
	 * 友情链接优化建议
	 *  @author wangzeng
	 * @param html
	 * @return
	 */
	public Map<String, Object> friendLinkAnalyze() {
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "友情链接优化建议");
		map.put("msg", "");
		map.put("score", 0);
		
		int y=flinks.size();
		
		if(y<10){
			map.put("msg", "建议增加到10个-20个左右");
			map.put("score", 0);
		}else if(y>20){
			map.put("msg", "建议减少至10个-20个左右");
			map.put("score", 0);
		}else{
			map.put("msg", "友情链接优化的不错");
			map.put("score", 2);
		}
		return map;
	}

	/**
	 * 友情链接增减速度建议
	 *  @author wangzeng
	 * @param html
	 * @return
	 */
	public Map<String, Object> flUpdateAnalyze() {
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "");
		map.put("msg", "");
		map.put("score", 0);

		return map;
	}

	/**
	 * 网站至少1周增加一个新URl或者修改一个URl的内容
	 *  @author wangzeng
	 * @param html
	 * @return
	 */
	public Map<String, Object> urlUpdateAnalyze() {
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "网站至少1周增加一个新URl或者修改一个URl的内容");
		map.put("msg", "");
		map.put("score", 0);
		
		SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd");
		String d=sdf.format(new Date());
		SimpleDateFormat sdf1=new SimpleDateFormat("MM-dd");
		String d1=sdf1.format(new Date());
		String d2=d1.replace("-", "/");
		String d3=d.replace("-", "/");
		boolean f=false;
		if(html.contains(d)||html.contains(d1)||html.contains(d2)||html.contains(d3)){
			f=true;
		}
		if(f){
			map.put("msg", "更新速度非常好");
			map.put("score", 2);
		}else{
			map.put("msg", "更新速度较慢");
			map.put("score", 1);
		}
		
		return map;
	}
	
	public String getDomain(String mainurl) {
		String reg = "(?<=http\\://[a-zA-Z0-9]{0,100}[.]{0,1})[^.\\s]*?\\.(com|cn|net|org|biz|info|cc|tv)";
		Pattern p = Pattern.compile(reg, Pattern.CASE_INSENSITIVE);
		Matcher m = p.matcher(mainurl);
		boolean blnp = m.find();
		if (blnp == true) {
			this.domain=m.group(0);
			return m.group(0);
		}
		return null;
	}

	/**
	 * 首页标题检查
	 * 规范:标题长度不超过40,分割字段不大于4个,
	 * 分值 2 
	 * @param html
	 * @return
	 * @author zhangke
	 */
	public Map<String, Object> titleAnalyze() {
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "首页标题检查");
		map.put("msg", "");
		map.put("score", 2);
		
		if(null==title||title.equals("")){
			map.put("msg", "标题不存在,不利于优化 ");
			map.put("score", 0);
			return map;
		}
		if(title.length()>40){
			map.put("msg", (String)map.get("msg")+"标题过长,不利于体现关键词重要度   ");
			map.put("score", (Integer)map.get("score")-1);
		}
		if(title.split("[,\\.\\|\\-\\_\\`]").length>4){
			map.put("msg", (String)map.get("msg")+"标题中有堆砌关键词嫌疑 ");
			map.put("score", (Integer)map.get("score")-1);
		}
		if(((Integer)map.get("score"))==2){
			map.put("msg", "标题设计优秀");
		}

		return map;
	}

	/**
	 * 首页描述检查
	 * 规范:存在,长度不超过75,分割字段不超过4
	 * 分值 2
	 * @param html
	 * @return
	 * @author zhangke
	 */
	public Map<String, Object> depictAnalyze() {
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "首页描述检查");
		map.put("msg", "");
		map.put("score", 2);
		
		if(null==description||description.equals("")){
			map.put("msg","没有描述,不利于优化 ");
			map.put("score", 0);
			return map;
		}
		if(description.length()>75){
			map.put("msg", "描述过长,不利于体现关键词重要度  ");
			map.put("score", (Integer)map.get("score")-1);
		}
		if(description.split("[,\\.\\|\\-\\_\\`\\、]").length>4){
			map.put("msg", (String)map.get("msg")+"描述中有堆砌关键词嫌疑  ");
			map.put("score", (Integer)map.get("score")-1);
		}
		if(((Integer)map.get("score"))==2){
			map.put("msg", "描述设计优秀");
		}
		return map;
	}

	/**
	 * 首页关键字检查
	 * 规范:存在,长度不超过75,分割字段大于4
	 * 分值2
	 * @param html
	 * @return
	 * @author zhangke
	 */
	public Map<String, Object> skeyAnalyze() {
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "首页关键字检查");
		map.put("msg", "");
		map.put("score", 2);
		
		if(null==keywords||keywords.equals("")){
			map.put("msg","没有关键词,不利于优化 ");
			map.put("score", 0);
			return map;
		}
		if(keywords.length()>75){
			map.put("msg", "关键词过长,不利于体现关键词重要度   ");
			map.put("score", 1);
			return map;
		}
		if(keywords.split("[,\\.\\|\\-\\_\\`\\、]").length<4){
			map.put("msg", (String)map.get("msg")+"关键词过少,不利于多个词排名  ");
			map.put("score", 1);
			return map;
		}
		if(((Integer)map.get("score"))==2){
			map.put("msg", "关键词设计合理");
		}
		return map;
	}

	/**
	 * 首页分词检查,优化关键词与内容关键词是否一致
	 * 规范:关键词在页面中出现的次数, ==0 ; <6 ,>= 6
	 * 分值 2
	 * @param html
	 * @return
	 * @throws CloneNotSupportedException 
	 * @author zhangke
	 */
	public Map<String, Object> participleAnalyze() throws CloneNotSupportedException {
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "首页分词检查,有话关键词与内容关键词是否一致");
		map.put("msg", "关键词设计良好");
		map.put("score", 2);
		
		Html html = new Html(this.html);
		String b = ((Html)html.clone()).$("body").text();
		
		int count = 0;
		if(null!=title&&!title.equals("")){
			String[] keys = title.split("[,\\.\\|\\-\\_\\`\\、]");
			for(String k:keys){
				count += keyCount(b, k);
			}
		}
		if(null!=keywords&&!keywords.equals("")){
			String[] keys = keywords.split("[,\\.\\|\\-\\_\\`\\、]");
			for(String key:keys){
				count += keyCount(b, key);
			}
		}
		if(count == 0){
			map.put("msg", "优化方向与内容完全无关,非常不利于优化");
			map.put("score", 0);
		}else if(count<6){
			map.put("msg", "优化方向与内容相关度低,不利于关键词排名");
			map.put("score", 1);
		}
		
		return map;
	}
	
	/**
	 * 一个字符串在另一个字符串中出现的次数
	 * @param h
	 * @param key
	 * @return
	 * @author zhangke
	 */
	public int keyCount(String h,String key){
		if(h==null||h.equals(""))return 0;
		if(key==null||key.equals(""))return 0;
		if(h.length()<key.length())return 0;
		int count=0;
		while(h.indexOf(key)!=-1){
			count++;
			h = h.substring(h.indexOf(key)+1);
		}
		return count;
	} 

	/**
	 * 是否存在多个H1
	 * 规范:有且只能有1个
	 * 分值 2
	 * @param html
	 * @return
	 * @author zhangke
	 */
	public Map<String, Object> hNumAnalyze() {
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "是否存在多个H1");
		map.put("msg", "H1标签合理");
		map.put("score", 2);
		
		Html html = new Html(this.html);
		Html h1 = (Html)html.$("h1");
		if(null==h1){
			map.put("msg", "未发现H1标签,建议添加一个");
			map.put("score", 1);
		}else if(h1.getElements().size()>1){
			map.put("msg", "发现"+h1.getElements().size()+"个H1标签,建议减少"+(h1.getElements().size()-1)+"个" );
			map.put("score", 0);
		}

		return map;
	}

	/**
	 * 是否可能触发滥用 B/Strong 反作弊规则
	 * 规则:<b>或<strong>标签 >2个 ,高风险, 1-2个有风险
	 * @param html
	 * @return
	 * @throws CloneNotSupportedException 
	 * @author zhangke
	 */
	public Map<String, Object> isBSAnalyze() throws CloneNotSupportedException {
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "是否可能触发滥用 B/Strong 反作弊规则");
		map.put("msg", "检查正常");
		map.put("score", 2);
		Html h = new Html(this.html);
		int bnum = ((Html)h.clone()).$("b").allele().size();
		int snum = ((Html)h.clone()).$("strong").allele().size();
		if((bnum+snum)>2){
			map.put("msg", "B/Strong作弊风险很高");
			map.put("score", 0);
		}else if((bnum+snum)>0){
			map.put("msg", "有触发作弊风险");
			map.put("score", 1);
		}
		return map;
	}

	/**
	 * 是否可能触发滥用 # 链接低质量站点规则
	 * 规则:href="#"此种链接>3个 有风险
	 * @param html
	 * @return
	 * @author zhangke
	 */
	public Map<String, Object> isNlinkAnalyze() {
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "是否可能触发滥用 # 链接低质量站点规则");
		map.put("msg", "检查正常");
		map.put("score", 2);
		if(emptyLinkNum > 3){
			map.put("msg", "#空链接超标,有风险");
			map.put("score", 0);
		}
		return map;
	}

	/**
	 * 是否可能触发滥用重复链接反作弊规则
	 * 规则:
	 * @author wangxu
	 * @param html
	 * @return
	 */
	public Map<String, Object> isRepeatLinkAnalyze() {
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "是否可能触发滥用重复链接反作弊规则  ");
		map.put("msg", "无重复链接作弊风险");
		map.put("score", 2);
		
		if(sameLinkNum > 3){
			map.put("msg", "重复链接过多,可能会触发降权");
			map.put("score", 0);
		}else if(sameLinkNum > 0){
			map.put("msg", "有重复链接作弊风险");
			map.put("score", 1);
		}
		
		return map;
	}

	/**
	 * 是否可能触发导出重复链接反作弊规则
	 * @param html
	 * @auth wangxu
	 * @return
	 */
	public Map<String, Object> isToRepeatlinkTradeAnalyze() {
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "是否可能触发导出重复链接反作弊规则");
		map.put("msg", "无风险");
		map.put("score", 2);
		int num = 0;
		Html h = new Html(this.html);
		String[] href = h.$("a").attr("href").split("\\^\\|");
		List<String> list = new ArrayList<String>();
		for (int i = 0; i < href.length; i++) {
			if (href[i].indexOf(this.domain)<0 && href[i].indexOf("javascript")<0) {
				if (list.contains(href[i])) {	//如果list里面包含这个链接，数量+1
					num +=1;
				}
				list.add(href[i]);		//将所有外链加到list里面去
			}
		}
		
		if (num >3) {
			map.put("msg", "重复链接较多，请优化");
			map.put("score", 0);
		}
		
		return map;
	}

	/**
	 * 是否可能触发ALT堆砌关键词反作弊规则
	 * @auth wangxu
	 * @param html
	 * @return
	 * @throws CloneNotSupportedException 
	 */
	public Map<String, Object> isAltMuchAnalyze() throws CloneNotSupportedException {
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "是否可能触发ALT堆砌关键词反作弊规则");
		map.put("msg", "检查正常");
		map.put("score", 2);
		int score = 0;
		Html h = new Html(this.html);
		
		StringBuffer sb = new StringBuffer();
		sb.append(((Html)h.clone()).$("img").attr("alt"));
		sb.append("\\^\\|");
		String[] alt = sb.toString().split("\\^\\|");
		for (int i = 0; i < alt.length; i++) {
			if (!alt[i].equals("")) {
				score +=1;
			}
		}
		if (score>3) {
			map.put("msg", "可能触发ALT堆砌关键词反作弊规则");
			map.put("score", 0);
		}
		
		return map;
	}

	/**
	 * 是否可能触发文本内容堆砌关键词反作弊规则
	 * @author wangxu
	 * @param html
	 * @return
	 */
	public Map<String, Object> isTextKeyAnalyze() {
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "是否可能触发文本内容堆砌关键词反作弊规则");
		map.put("msg", "文本内容关键词密度有待优化");
		map.put("score", 0);
		Html h = new Html(this.html);
		String text = h.text();
		double scale = ((double)(text.split(keywords).length-1)*keywords.length())/((double)text.length());	//关键词比率
		if (scale>0.08) {
			map.put("msg", "文本内容关键词密度过密");
			map.put("score", 0);
		}else if (scale <0.03) {
			map.put("msg", "文本内容关键词密度过低");
			map.put("score", 0);
		} else{
			map.put("msg", "文本内容关键词密度良好");
			map.put("score", 2);
		}
		return map;
	}

	/**
	 * 是否可能触发TITLE属性中堆砌关键词反作弊规则
	 * @author zhangke
	 * @param html
	 * @return
	 * @throws CloneNotSupportedException 
	 */
	public Map<String, Object> isTitleKeyAnalyze() throws CloneNotSupportedException {
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "是否可能触发TITLE属性中堆砌关键词反作弊规则");
		map.put("msg", "检查正常");
		map.put("score", 2);

		Html h = new Html(html);
		StringBuffer sb = new StringBuffer();
		sb.append(((Html)h.clone()).$("img").attr("title"));
		sb.append("\\^\\|");
		sb.append(((Html)h.clone()).$("a").attr("title"));
		sb.append("\\^\\|");
		sb.append(((Html)h.clone()).$("div").attr("title"));
		String[] text = sb.toString().split("\\^\\|");
		for(String t:text){
			if(t.split("[,\\.\\|\\-\\_\\`\\、]").length > 4){
				map.put("msg", "可能触发堆砌降权");
				map.put("score", 0);
				return map;
			}
		}
		return map;
	}

	/**
	 * 是否可能触发隐藏外链,反黑链检查作弊规则
	 * 
	 * @param html
	 * @auth zhangke
	 * @return
	 */
	public Map<String, Object> isDisLinkAnalyze() {
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "是否可能触发隐藏外链,反黑链检查作弊规则");
		map.put("msg", "检查正常");
		map.put("score", 2);
		
		Html h = new Html(html);
		int displayLinks = h.$("div[style*=display:none]").$("a").allele().size();
		if(displayLinks > 2){
			map.put("msg", "检查到隐藏外链,有风险");
			map.put("score", 0);
		}else if(displayLinks > 0){
			map.put("msg", "检查到少量隐藏外链");
			map.put("score", 1);
		}
		
		return map;
	}

	/**
	 * 是否可能触发关键词密度过大反作弊规则
	 * @author zhangke
	 * @param html
	 * @return
	 */
	public Map<String, Object> isKeyMuchAnalyze() {
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "是否可能触发关键词密度过大反作弊规则");
		map.put("msg", "内容关键词密度设置合理");
		map.put("score", 2);
		
		Html h = new Html(html);
		String text = h.$("body").text();
		Set<String> keys = new HashSet<String>();
		String kstr = title+","+keywords+","+description;
		
		for(String k:kstr.split("[,\\.\\|\\-\\_\\`\\、]")){
			keys.add(k);
		}
		double n = 0;
		for(String key:keys){
			String bt = text;
			n += keyCount(bt, key);
		}
		double tlength = text.length();
		if((n/tlength) > 0.1){
			map.put("msg", "密度超标,极易触发关键词堆砌降权");
			map.put("score", 0);
		}else if((n/tlength) > 0.05){
			map.put("msg", "内容关键词密度偏高");
			map.put("score", 1);
		}
		return map;
	}

	/**
	 * 是否可能触发内页过度导出链接反作弊规则
	 * @author panmg
	 * @param html
	 * @return
	 */
	public Map<String, Object> isLinkMuchAnalyze() {
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "是否可能触发内页过度导出链接反作弊规则");
		int linkSize = ownlinks.size();
		if(linkSize>160){
			map.put("msg", "内页导出链接数过多");
			map.put("score", 0);
		}else{
			map.put("msg", "内页导出链接数正常");
			map.put("score", 2);
		}
		return map;
	}

	/**
	 * 是否可能触发隐藏关键词反作弊规则
	 * @author panmg
	 * @param html
	 * @return
	 * @throws CloneNotSupportedException 
	 */
	public Map<String, Object> isDisKeyAnalyze() throws CloneNotSupportedException {
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "是否可能触发隐藏关键词反作弊规则");
		Html h = new Html(html);
		int i = ((Selectable) h.clone()).$("div.hide").allele().size();
		i += ((Selectable) h.clone()).$("div.hidden").allele().size();
		i += ((Selectable) h.clone()).$("div[style*=display:none]").allele().size();
		i += ((Selectable) h.clone()).$("div[style*=visibility:none]").allele().size();
		if(i>3){
			map.put("msg", "出现违反隐藏关键词规则");
			map.put("score", 0);
		}else{
			map.put("msg", "检查正常");
			map.put("score", 2);
		}
		return map;
	}

	/**
	 * 内部链接丰富度是否有利于优化
	 * @author panmg
	 * @param html
	 * @return
	 * @ 
	 */
	public Map<String, Object> isEasyToOptimizeAnalyze() {
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "内部链接丰富度是否有利于优化");
		int linknums =0;
		int num =0;
		for (;;) {
			String url = getRandomUrl(ownlinks);
			String htmlres = getHtml(url, charset);
			Html html = new Html(htmlres);
			linknums += html.$("a").allele().size();
			num++;
			if(num==5)	break;
		}
		
		if(linknums > 900){
			map.put("msg", "内部链接过多不利于优化");
			map.put("score", 1);
		}else if(linknums>210 && linknums<900){
			map.put("msg", "内部链接数量适中,优");
			map.put("score", 2);
		}else{
			map.put("msg", "内部链接过少不利于搜索优化");
			map.put("score", 1);
		}
		return map;
	}

	/**
	 * 内页是否与首页标题/关键词/描述相同
	 * @author panmg
	 * @param html
	 * @return
	 * @ Exception 
	 */
	public Map<String, Object> isSameAnalyze()  {
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "内页是否与首页标题/关键词/描述相同");
		int num = 0;
		String keys [] =(title+","+keywords+","+description).split("[,\\.\\|\\-\\_\\`]");
		int cnt = 0;
		label:
		for (;;) {
			String url = getRandomUrl(ownlinks);
			String htmlres = getHtml(url, charset);
			String text = new Html(htmlres).text();
			for(String key : keys){
				if(text.contains(key)){
					cnt++;
					if(cnt > 80){
						break label;
					}
				}
			}
			num++;
			if(num==5)	break;
		}
		
		if(cnt>80){
			map.put("msg", "存在大量的关键词, 标题, 描述重复");
			map.put("score", 0);
		}else{
			map.put("msg", "关键字,标题, 描述优");
			map.put("score", 2);
		}
		return map;
	
	}

	/**
	 * 内页是否结构过于简单
	 * @author panmg
	 * @param html
	 * @return
	 * @throws CloneNotSupportedException 
	 * @ 
	 */
	public Map<String, Object> isEasyAnalyze() throws CloneNotSupportedException  {
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "内页是否结构过于简单");
		int divnums =0;
		int num = 0;
		for (;;) {
			String url = getRandomUrl(ownlinks);
			String htmlres = getHtml(url, charset);
			Html html = new Html(htmlres);
			html = (Html) html.$("body>div");
			divnums +=html.allele().size();
			divnums +=((AbstractSelectable) html.clone()).$("div").allele().size();
			num++;
			if(num==5)	break;
		}
		
		if(divnums>500){
			map.put("msg", "页面过于复杂");
			map.put("score", 1);
		}else if(divnums> 275 || divnums<425){
			map.put("msg", "页面复杂度优");
			map.put("score", 2);
		}else{
			map.put("msg", "页面过于简单");
			map.put("score", 1);
		}
		return map;
	}

	/**
	 * 内页页面相似度检查
	 * @author panmg
	 * @param html
	 * @return
	 * @throws CloneNotSupportedException 
	 * @ 
	 */
	public Map<String, Object> sameAnalyze() throws CloneNotSupportedException  {
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "内页页面相似度检查");
		Set<String> classnames = new HashSet<String>();
		Set<String> ids = new HashSet<String>();
		int num = 0;
		for (;;) {
			String url = getRandomUrl(ownlinks);
			String htmlres = getHtml(url, charset);
			Html html = new Html(htmlres);
			html = (Html) html.$("body>div");
			String classname = ((Selectable) html.clone()).attr("class");
			String id = ((Selectable) html.clone()).attr("id");
			classnames.add(classname);
			ids.add(id);
			num++;
			if(num==5)	break;
		}
		if(classnames.size()>7 || ids.size()>7){
			map.put("msg", "相似度较差, 建议优化");
			map.put("score", 1);
		}else if(classnames.size()>4 || ids.size()>4){
			map.put("msg", "相似度优");
			map.put("score", 2);
		}else{
			map.put("msg", "相似度过高");
			map.put("score", 1);
		}
		return map;
	}

	/**
	 * 内页优化目标关键词是否与首页优化方向一致
	 * 关键词 获取 判断方式 较差, 需优化
	 * @author panmg
	 * @param html
	 * @return
	 * @throws CloneNotSupportedException 
	 * @ 
	 */
	public Map<String, Object> accordanceAnalyze() throws CloneNotSupportedException  {
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "内页优化目标关键词是否与首页优化方向一致");
		int num = 0;
		String keys [] =keywords.split("[,\\.\\|\\-\\_\\`]");
		int cnt = 0;
		label:
		for (;;) {
			String url = getRandomUrl(ownlinks);
			String htmlres = getHtml(url, charset);
			Html h = new Html(htmlres);
			String title = ((Html)h.clone()).$("title").text();
			String keywords = ((Html)h.clone()).$("[name=keywords]").attr("content");
			String description = ((Html)h.clone()).$("[name=description]").attr("content");
			for(String key : keys){
				if(title.contains(key) || keywords.contains(key) || description.contains(key)){
					cnt++;
					if(cnt > 80){
						break label;
					}
				}
			}
			num++;
			if(num==5)	break label;
		}
		
		if(cnt > 80){
			map.put("msg", "关键字方向较差,太多相同的标题和描述");
			map.put("score", 2);
		}else{
			map.put("msg", "关键字方向很优, 基本没有重复");
			map.put("score", 0);
		}
		return map;
	}

	/**
	 * 友情链接检查
	 * @author huanyy
	 * @param html
	 * @return
	 */
	public Map<String, Object> flinkAnalyze() {
		Map<String, Object> map = new HashMap<String, Object>();
		Iterator<String>it=flinks.iterator();
		int score=0;
		int len=flinks.size();
		String msg="差";
		//记录合法友情链接数
		if(len>10){
			score=1;
			msg="好";
		}else{
			int count=0;
			if(len<10)
			while (it.hasNext()) {
					String url=it.next();
					try {
						if(HttpUtil.request(url, charset).getResponseCode()==200){
							count++;
						}
					} catch (Exception e) {
						System.out.println(url+"loaderror");
					}
				}
			if(count==len){ 
				score=1;
				msg="好";
			}
		}
		map.put("item", "友情链接检查");
		map.put("msg", "友情链接检查"+msg);
		map.put("score", score);
		return map;
	}



	/**
	 * 是否ROBOTS屏蔽全站
	 * @author wangxu
	 * @param html
	 * @return
	 * @ 
	 */
	public Map<String, Object> robotsAnalyze()  {
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "是否ROBOTS屏蔽全站");
		map.put("msg", "检测正常");
		map.put("score", 2);
		int responseCode = 0;
		try{
			responseCode = HttpUtil.request(this.cururl+"/robots.txt", charset).getResponseCode();
		}catch(Exception e){}
		if (responseCode == 404) {
			map.put("msg", "未检测到robots.txt，建议添加一个");
			map.put("score", 0);
		}
		return map;
	}

	/**
	 * 是否可能被解析镜像
	 * @author huanyy
	 * @param html
	 * @return
	 */
	public Map<String, Object> mirrorAnalyze() {
		int count=0;
		String msg="";
		Html h=new Html(html);
		Map<String, Object> map = new HashMap<String, Object>();
		//如果有备案号
		//证明有独立的IP
		try {
			recordAnalyze();
			if(getIsbeian())  count++;
		} catch (Exception e) {
			e.printStackTrace();
		}
		//检测是否有多级目录
		for (String link : links) {
			int a=0;
			for (int i = 0; i < link.length(); i++) {
				if(link.charAt(i)=='/')  a++;
			}
			if(link.indexOf(domain)!=-1&&(a>=4)){ 
				count++;
				break;
			}
		}
		//检测是否有base标签
		if(h.$("head").html().indexOf("base")!=-1) count++;
		if(count>=2) msg="未发现异常";
		else msg="网站需整改，容易被镜像";
		map.put("item", "是否可能被解析镜像");
		map.put("msg", msg);
		map.put("score", count);
		return map;
	}

	/**
	 * 是否可能出发网页打开速度过低降权
	 * @author huanyy
	 * @param html
	 * @return
	 */
	public Map<String, Object> speedAnalyze() {
		Map<String, Object> map = new HashMap<String, Object>();
		long time=0;
		try {
			long start =System.currentTimeMillis();
			int code=HttpUtil.request(cururl, charset).getResponseCode();
			long end=System.currentTimeMillis();
			if(code==200){
				time=end-start;
			}
		} catch (Exception e) {
			System.out.println("loaderror");
		}
		int score=0;
		if(time<3*1000){
			score=2;
		}else if(time>=3*1000&&time<6*1000){
			score=1;
		}
		map.put("item", "是否可能出发网页打开速度过低降权");
		map.put("msg", "网站载入时间是"+time+"ms");
		map.put("score", score);
		return map;
	}

	/**
	 * 是否泛解析,是否可以被人利用导致降权
	 * @author huanyy
	 * @param html
	 * @return
	 */
	public Map<String, Object> canBeUseAnalyze() {
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "是否泛解析,是否可以被人利用导致降权");
		map.put("score", 0);
		map.put("msg", "不存在泛解析");
		String url1="23sdfjginkdkiug."+getDomain(cururl);
		String reStr = executeCmd("ping "+url1);
		if(reStr.indexOf("Packets")!=-1){
			map.put("score", 2);
			map.put("msg", "存在泛解析");
		}
		return map;
	}

	/**
	 * 隐藏IFRAME检查
	 * @author huanyy
	 * @param html
	 * @return
	 */
	public Map<String, Object> iframeAnalyze() {
		Map<String, Object> map = new HashMap<String, Object>();
		Html h=   new  Html(html);
		String[] iframes=h.$("iframe").text().split("\\^\\|");
		int score=0;
		int len=iframes.length;
		if(len>0&&len<3) score=2;
		else if(len>=3&&len<6) score=1;
		map.put("item", "隐藏IFRAME检查");
		map.put("msg", "共有"+len+"个隐藏iframe");
		map.put("score", score);
		return map;
	}

	/**
	 * HTML复杂度分析是否可能导致网页展现体验差
	 * @author wangzeng
	 * @param html
	 * @return
	 * @throws CloneNotSupportedException 
	 */
	public Map<String, Object> complexAnalyze() throws CloneNotSupportedException {
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "HTML复杂度分析");
		map.put("msg", "");
		map.put("score", 0);
		
		int divnums =0;
		Html html = new Html(this.html);
		html = (Html) html.$("body>div");
		divnums +=html.allele().size();
		divnums +=((AbstractSelectable) html.clone()).$("div").allele().size();
		
		if(divnums>105){
			map.put("msg", "HTML页面过于复杂");
			map.put("score", 1);
		}else if(divnums> 80){
			map.put("msg", "HTML页面复杂度优");
			map.put("score", 2);
		}else{
			map.put("msg", "HTML页面过于简单");
			map.put("score", 1);
		}
		
		return map;
	}

	/**
	 * 是否备案
	 * @author wangxu
	 * @param html
	 * @return
	 */
	public Map<String, Object> recordAnalyze() {
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "是否备案");
		map.put("msg", "未检测到备案信息");
		map.put("score", 0);
		
		String url = "http://icp.aizhan.com/"+this.domain;
		String s = getHtml(url, "utf-8");
		Html h = new Html(s);
		List<String> list = h.$("td.tablehead").all();
		if (list!= null && list.size()>0) {
			if (list.contains("审核时间")) {
				map.put("msg", "已获得企业备案");
				map.put("score", 2);
			}else {
				map.put("msg", "已备案但未审核通过");
				map.put("score", 1);
			}
			setIsbeian(true);
		}
		return map;
	}

	/**
	 * 是否虚假官网
	 *  @author  wangxu
	 * @param html
	 * @return
	 * @ 
	 */
	public Map<String, Object> untrueAnalyze() {
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "是否虚假官网");
		map.put("msg", "不是企业官网");
		map.put("score", 0);
		
		String url = "http://icp.aizhan.com/"+this.domain;
		String s = getHtml(url, "utf-8");
		Html h = new Html(s);
		if (h.$("td.tablehead").all().contains("主办单位性质")) {
			map.put("msg", "是企业官网");
			map.put("score", 2);
		}
		return map;
	}

	/**
	 * 是否存在死链接/错误链接
	 * @author  wangxu
	 * @param html
	 * @return
	 * @ 
	 */
	/*public Map<String, Object> dieLinkAnalyze(){
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "是否存在死链接/错误链接");
		map.put("msg", "未监测到死链接");
		map.put("score", 2);
		for (String link : links) {
			if(link.length()==0) continue;
			if(!isConn(link)){
				map.put("msg", "存在死链接"+link+"，可以优化");
				map.put("score", 0);
				break;
			}
		}
		return map;
	}*/

	/**
	 * 404页面是否能引导用户正确返回
	 * @auth wangxu
	 * @param html
	 * @return
	 */
	public Map<String, Object> backFrom404Analyze(){
		Map<String, Object> map = new HashMap<String, Object>();
		map.put("item", "404页面是否能引导用户正确返回");
		String url = this.cururl+"/safhusaihfasnvsadnpovglasflkashviwqbiuhewip.html";
		HttpResponse res = null;
		try{
			res = HttpUtil.request(url, charset);
		}catch(Exception e){}
		String s = res.getResponseString();
		Html h = new Html(s);
		String t = h.$("body").$("a").attr("href");
		if (res.getResponseCode() == 404 || (t!=null&& !t.equals(""))) {
			map.put("score", 2);
			map.put("msg", "404页面状态正确,存在引导，设计优秀");
		}else {
			map.put("score", 0);
			map.put("msg", "没有404状态，设计不是很优秀");
		}
		return map;
	}
	
	/**
	 * 获取随机 url
	 * @param urls
	 * @return
	 */
	public String getRandomUrl(Set<String> urls){
		int ranNum = random.nextInt(urls.size());
		List<String> list = new ArrayList<String>();
		list.addAll(urls);
		return list.get(ranNum);
	}
	
	/**
	 * 请求 URL 返回 HTML
	 * @param url
	 * @param charset
	 * @return
	 * @
	 */
	public String getHtml(String url, String charset){
		String html = urlCache.get(url);
		if(html == null){
			try{
				html = HttpUtil.getStr(url, charset);
			}catch(Exception e){}
			synchronized (urlCache) {
				urlCache.put(url, html);
			}
		}
		return html;
	}
	
	public boolean isConn(String _url) {
		URL url = null;
		try {
			url = new URL(_url);
			HttpURLConnection urlcon =  (HttpURLConnection) url.openConnection();
			int code = urlcon.getResponseCode();
			if(code<400){
				return true;
			}
			return false;
		} catch (IOException e) {
			return false;
		}
	}
	
	
	public static String executeCmd(String strCmd) {
		String comStr = "";
		try {
			Process p = Runtime.getRuntime().exec("cmd /c " + strCmd);
			p.waitFor();
			BufferedReader br = new BufferedReader(new InputStreamReader(p.getInputStream(),"GBK"));
			String line;
			while ((line = br.readLine()) != null) {
				comStr += line;
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
		return comStr;
	}
	
	private Map<String, String> urlCache = new HashMap<String, String>();
	
	static Random random = new Random();

}
